In [1]:
df = pd.read_csv('../resource/preprocess_dist_df.csv')
col = df.columns.astype(float).values
df.head()
Out[1]:
In [2]:
df['sum'] = df.sum(axis=1)
df.head()
Out[2]:
In [3]:
1312 / 58122
Out[3]:
In [4]:
# p(X)
prob_df = df.ix[:,:'5'].apply(lambda x: x / df['sum'])
prob_df.head()
Out[4]:
In [5]:
col
Out[5]:
In [6]:
# X*p(X)
Xprob_df = prob_df.mul(col)
Xprob_df.head()
Out[6]:
In [7]:
mean = Xprob_df.sum(axis=1)
mean.head()
Out[7]:
In [8]:
prob_df['mean'] = Xprob_df.sum(axis=1)
prob_df.head()
Out[8]:
In [9]:
# (X - u)
sub_df = pd.DataFrame(columns = col)
for index, row in prob_df.iterrows():
sub = col - row['mean']
sub_df.loc[len(sub_df)] = sub
sub_df.head()
Out[9]:
In [10]:
# (X - u)^2
sub2_df = sub_df.applymap(lambda x: x*x)
sub2_df.head()
Out[10]:
In [11]:
13.482005*0.000537
Out[11]:
In [12]:
# var(X) = (X - u)^2 * p(X)
var = sub2_df.mul(prob_df.ix[:,:'5']).sum(axis=1)
var.head()
Out[12]:
In [13]:
np.sqrt(1.121530)
Out[13]:
In [14]:
std = var.map(np.sqrt)
std.head()
Out[14]:
In [15]:
# (X - u)^3
sub3_df = sub_df.applymap(lambda x: x*x*x)
sub3_df.head()
Out[15]:
In [16]:
# theta^3
std3 = std.map(lambda x: x**3)
std3.head()
Out[16]:
In [17]:
#
skew_df = sub3_df.apply(lambda x: x / std3)
skew_df.head()
Out[17]:
In [18]:
skew = skew_df.mul(prob_df.ix[:,:'5']).sum(axis=1)
skew.head()
Out[18]:
In [19]:
expect_df = pd.DataFrame({'mean':mean,
'std':std,})
expect_df['skew'] = skew
expect_df.head()
Out[19]:
In [20]:
expect_df.to_csv('../resource/preprocess_expectation_df.csv', index=False)